import argparse
from run_setups import train_run


def run(config_dict):
    train_run(config_dict)


if __name__=="__main__":

    parser = argparse.ArgumentParser(description="AI collusion")

    parser.add_argument(
        "--seed",
        type=int,
        default=12345,
        help="The random number generator seed; default: 12345",
    )

    parser.add_argument(
        "--max_steps",
        type=int,
        default=5000000,
        help="Number of steps until run stops"
    )

    parser.add_argument(
        "--algorithm",
        type=str,
        default="PPO",
        help="DeepRL algorithm used for supervisor"
    )

    parser.add_argument(
        "--tot_num_reward_steps",
        type=int,
        default=100,
        help="Number of reward steps for stackelberg approach"
    )

    parser.add_argument(
        "--tot_num_eq_steps",
        type=int,
        default=1000,
        help="Number of equilibrium (qtable update) steps for Stackelberg approach"
    )

    parser.add_argument(
        "--frac_excluded_eq_steps",
        type=lambda x: float(x),
        default=0.0,
        help="Sets the fraction of eq steps that are not exposed to the learner"
    )

    parser.add_argument(
        "--experiment_type",
        type=str,
        default="simple_matrix",
        help="Type of experiment to run. Options: simple_matrix, simple_Bayesian, mspm, matrix_design"
    )

    parser.add_argument(
        "--matrix_game_name",
        type=str,
        default="game_1",
    )

    parser.add_argument(
        "--randomized",
        type=bool,
        default=False,
    )

    parser.add_argument(
        "--critic_obs",
        type=str,
        default="none",
        help="Determines which additional info is given to critic network. Options: flag, none, full"
    )

    parser.add_argument(
        "--fix_episode_actions",
        type=str,
        default="True",
        help="If true, keep observation-action mapping so that leader policy behaves deterministically during each StackPOMDP episode"
    )

    parser.add_argument(
        "--randomization_type",
        type=str,
        default="linear",
        help="Determines how to convert weights into probabilities if policy actions have to be interpreted as probabilities. Options: linear, logit"
    )

    parser.add_argument(
        "--followers_algorithm",
        type=str,
        default="MW",
        help="Determines the followers' learning algorithm. Options: MW, Qlearning"
    )

    parser.add_argument(
        "--num_followers_messages",
        type=int,
        default=1,
        help="Determines the number of messages available to the followers"
    )

    args = parser.parse_args()

    config_dict = vars(args)

    run(config_dict)